* better
[mascara-docs.git] / lang / C / sorting.and.searching.cormen.algo / Introduction to Algorithms, Third Edition / code / ext.c
blob7f4fe6eb76d5c533e38f62006c4d501539518299
1 /* external sort */
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <string.h>
7 /****************************
8 * implementation dependent *
9 ****************************/
11 /* template for workfiles (8.3 format) */
12 #define FNAME "_sort%03d.dat"
13 #define LNAME 13
15 /* comparison operators */
16 #define compLT(x,y) (x < y)
17 #define compGT(x,y) (x > y)
19 /* define the record to be sorted here */
20 #define LRECL 100
21 typedef int keyType;
22 typedef struct recTypeTag {
23 keyType key; /* sort key for record */
24 #if LRECL
25 char data[LRECL-sizeof(keyType)]; /* other fields */
26 #endif
27 } recType;
29 /******************************
30 * implementation independent *
31 ******************************/
33 typedef enum {false, true} bool;
35 typedef struct tmpFileTag {
36 FILE *fp; /* file pointer */
37 char name[LNAME]; /* filename */
38 recType rec; /* last record read */
39 int dummy; /* number of dummy runs */
40 bool eof; /* end-of-file flag */
41 bool eor; /* end-of-run flag */
42 bool valid; /* true if rec is valid */
43 int fib; /* ideal fibonacci number */
44 } tmpFileType;
46 static tmpFileType **file; /* array of file info for tmp files */
47 static int nTmpFiles; /* number of tmp files */
48 static char *ifName; /* input filename */
49 static char *ofName; /* output filename */
51 static int level; /* level of runs */
52 static int nNodes; /* number of nodes for selection tree */
54 void deleteTmpFiles(void) {
55 int i;
57 /* delete merge files and free resources */
58 if (file) {
59 for (i = 0; i < nTmpFiles; i++) {
60 if (file[i]) {
61 if (file[i]->fp) fclose(file[i]->fp);
62 if (*file[i]->name) remove(file[i]->name);
63 free (file[i]);
66 free (file);
70 void termTmpFiles(int rc) {
72 /* cleanup files */
73 remove(ofName);
74 if (rc == 0) {
75 int fileT;
77 /* file[T] contains results */
78 fileT = nTmpFiles - 1;
79 fclose(file[fileT]->fp); file[fileT]->fp = NULL;
80 if (rename(file[fileT]->name, ofName)) {
81 perror("io1");
82 deleteTmpFiles();
83 exit(1);
85 *file[fileT]->name = 0;
87 deleteTmpFiles();
90 void cleanExit(int rc) {
92 /* cleanup tmp files and exit */
93 termTmpFiles(rc);
94 exit(rc);
97 void *safeMalloc(size_t size) {
98 void *p;
100 /* safely allocate memory and initialize to zero */
101 if ((p = calloc(1, size)) == NULL) {
102 printf("error: malloc failed, size = %d\n", size);
103 cleanExit(1);
105 return p;
108 void initTmpFiles(void) {
109 int i;
110 tmpFileType *fileInfo;
112 /* initialize merge files */
113 if (nTmpFiles < 3) nTmpFiles = 3;
114 file = safeMalloc(nTmpFiles * sizeof(tmpFileType*));
115 fileInfo = safeMalloc(nTmpFiles * sizeof(tmpFileType));
116 for (i = 0; i < nTmpFiles; i++) {
117 file[i] = fileInfo + i;
118 sprintf(file[i]->name, FNAME, i);
119 if ((file[i]->fp = fopen(file[i]->name, "w+b")) == NULL) {
120 perror("io2");
121 cleanExit(1);
126 recType *readRec(void) {
128 typedef struct iNodeTag { /* internal node */
129 struct iNodeTag *parent;/* parent of internal node */
130 struct eNodeTag *loser; /* external loser */
131 } iNodeType;
133 typedef struct eNodeTag { /* external node */
134 struct iNodeTag *parent;/* parent of external node */
135 recType rec; /* input record */
136 int run; /* run number */
137 bool valid; /* input record is valid */
138 } eNodeType;
140 typedef struct nodeTag {
141 iNodeType i; /* internal node */
142 eNodeType e; /* external node */
143 } nodeType;
145 static nodeType *node; /* array of selection tree nodes */
146 static eNodeType *win; /* new winner */
147 static FILE *ifp; /* input file */
148 static bool eof; /* true if end-of-file, input */
149 static int maxRun; /* maximum run number */
150 static int curRun; /* current run number */
151 iNodeType *p; /* pointer to internal nodes */
152 static bool lastKeyValid; /* true if lastKey is valid */
153 static keyType lastKey; /* last key written */
155 /* read next record using replacement selection */
157 /* check for first call */
158 if (node == NULL) {
159 int i;
161 if (nNodes < 2) nNodes = 2;
162 node = safeMalloc(nNodes * sizeof(nodeType));
163 for (i = 0; i < nNodes; i++) {
164 node[i].i.loser = &node[i].e;
165 node[i].i.parent = &node[i/2].i;
166 node[i].e.parent = &node[(nNodes + i)/2].i;
167 node[i].e.run = 0;
168 node[i].e.valid = false;
170 win = &node[0].e;
171 lastKeyValid = false;
173 if ((ifp = fopen(ifName, "rb")) == NULL) {
174 printf("error: file %s, unable to open\n", ifName);
175 cleanExit(1);
179 while (1) {
181 /* replace previous winner with new record */
182 if (!eof) {
183 if (fread(&win->rec, sizeof(recType), 1, ifp) == 1) {
184 if ((!lastKeyValid || compLT(win->rec.key, lastKey))
185 && (++win->run > maxRun))
186 maxRun = win->run;
187 win->valid = true;
188 } else if (feof(ifp)) {
189 fclose(ifp);
190 eof = true;
191 win->valid = false;
192 win->run = maxRun + 1;
193 } else {
194 perror("io4");
195 cleanExit(1);
197 } else {
198 win->valid = false;
199 win->run = maxRun + 1;
202 /* adjust loser and winner pointers */
203 p = win->parent;
204 do {
205 bool swap;
206 swap = false;
207 if (p->loser->run < win->run) {
208 swap = true;
209 } else if (p->loser->run == win->run) {
210 if (p->loser->valid && win->valid) {
211 if (compLT(p->loser->rec.key, win->rec.key))
212 swap = true;
213 } else {
214 swap = true;
217 if (swap) {
218 /* p should be winner */
219 eNodeType *t;
221 t = p->loser;
222 p->loser = win;
223 win = t;
225 p = p->parent;
226 } while (p != &node[0].i);
228 /* end of run? */
229 if (win->run != curRun) {
230 /* win->run = curRun + 1 */
231 if (win->run > maxRun) {
232 /* end of output */
233 free(node);
234 return NULL;
236 curRun = win->run;
239 /* output top of tree */
240 if (win->run) {
241 lastKey = win->rec.key;
242 lastKeyValid = true;
243 return &win->rec;
248 void makeRuns(void) {
249 recType *win; /* winner */
250 int fileT; /* last file */
251 int fileP; /* next to last file */
252 int j; /* selects file[j] */
255 /* Make initial runs using replacement selection.
256 * Runs are written using a Fibonacci distintbution.
259 /* initialize file structures */
260 fileT = nTmpFiles - 1;
261 fileP = fileT - 1;
262 for (j = 0; j < fileT; j++) {
263 file[j]->fib = 1;
264 file[j]->dummy = 1;
266 file[fileT]->fib = 0;
267 file[fileT]->dummy = 0;
269 level = 1;
270 j = 0;
273 win = readRec();
274 while (win) {
275 bool anyrun;
277 anyrun = false;
278 for (j = 0; win && j <= fileP; j++) {
279 bool run;
281 run = false;
282 if (file[j]->valid) {
283 if (!compLT(win->key, file[j]->rec.key)) {
284 /* append to an existing run */
285 run = true;
286 } else if (file[j]->dummy) {
287 /* start a new run */
288 file[j]->dummy--;
289 run = true;
291 } else {
292 /* first run in file */
293 file[j]->dummy--;
294 run = true;
297 if (run) {
298 anyrun = true;
300 /* flush run */
301 while(1) {
302 if (fwrite(win, sizeof(recType), 1, file[j]->fp) != 1) {
303 perror("io3");
304 cleanExit(1);
306 file[j]->rec.key = win->key;
307 file[j]->valid = true;
308 if ((win = readRec()) == NULL) break;
309 if (compLT(win->key, file[j]->rec.key)) break;
314 /* if no room for runs, up a level */
315 if (!anyrun) {
316 int t;
317 level++;
318 t = file[0]->fib;
319 for (j = 0; j <= fileP; j++) {
320 file[j]->dummy = t + file[j+1]->fib - file[j]->fib;
321 file[j]->fib = t + file[j+1]->fib;
327 void rewindFile(int j) {
328 /* rewind file[j] and read in first record */
329 file[j]->eor = false;
330 file[j]->eof = false;
331 rewind(file[j]->fp);
332 if (fread(&file[j]->rec, sizeof(recType), 1, file[j]->fp) != 1) {
333 if (feof(file[j]->fp)) {
334 file[j]->eor = true;
335 file[j]->eof = true;
336 } else {
337 perror("io5");
338 cleanExit(1);
343 void mergeSort(void) {
344 int fileT;
345 int fileP;
346 int j;
347 tmpFileType *tfile;
349 /* polyphase merge sort */
351 fileT = nTmpFiles - 1;
352 fileP = fileT - 1;
354 /* prime the files */
355 for (j = 0; j < fileT; j++) {
356 rewindFile(j);
359 /* each pass through loop merges one run */
360 while (level) {
361 while(1) {
362 bool allDummies;
363 bool anyRuns;
365 /* scan for runs */
366 allDummies = true;
367 anyRuns = false;
368 for (j = 0; j <= fileP; j++) {
369 if (!file[j]->dummy) {
370 allDummies = false;
371 if (!file[j]->eof) anyRuns = true;
375 if (anyRuns) {
376 int k;
377 keyType lastKey;
379 /* merge 1 run file[0]..file[P] --> file[T] */
381 while(1) {
382 /* each pass thru loop writes 1 record to file[fileT] */
384 /* find smallest key */
385 k = -1;
386 for (j = 0; j <= fileP; j++) {
387 if (file[j]->eor) continue;
388 if (file[j]->dummy) continue;
389 if (k < 0 ||
390 (k != j && compGT(file[k]->rec.key, file[j]->rec.key)))
391 k = j;
393 if (k < 0) break;
395 /* write record[k] to file[fileT] */
396 if (fwrite(&file[k]->rec, sizeof(recType), 1,
397 file[fileT]->fp) != 1) {
398 perror("io6");
399 cleanExit(1);
402 /* replace record[k] */
403 lastKey = file[k]->rec.key;
404 if (fread(&file[k]->rec, sizeof(recType), 1,
405 file[k]->fp) == 1) {
406 /* check for end of run on file[s] */
407 if (compLT(file[k]->rec.key, lastKey))
408 file[k]->eor = true;
409 } else if (feof(file[k]->fp)) {
410 file[k]->eof = true;
411 file[k]->eor = true;
412 } else {
413 perror("io7");
414 cleanExit(1);
418 /* fixup dummies */
419 for (j = 0; j <= fileP; j++) {
420 if (file[j]->dummy) file[j]->dummy--;
421 if (!file[j]->eof) file[j]->eor = false;
424 } else if (allDummies) {
425 for (j = 0; j <= fileP; j++)
426 file[j]->dummy--;
427 file[fileT]->dummy++;
430 /* end of run */
431 if (file[fileP]->eof && !file[fileP]->dummy) {
432 /* completed a fibonocci-level */
433 level--;
434 if (!level) {
435 /* we're done, file[fileT] contains data */
436 return;
439 /* fileP is exhausted, reopen as new */
440 fclose(file[fileP]->fp);
441 if ((file[fileP]->fp = fopen(file[fileP]->name, "w+b"))
442 == NULL) {
443 perror("io8");
444 cleanExit(1);
446 file[fileP]->eof = false;
447 file[fileP]->eor = false;
449 rewindFile(fileT);
451 /* f[0],f[1]...,f[fileT] <-- f[fileT],f[0]...,f[T-1] */
452 tfile = file[fileT];
453 memmove(file + 1, file, fileT * sizeof(tmpFileType*));
454 file[0] = tfile;
456 /* start new runs */
457 for (j = 0; j <= fileP; j++)
458 if (!file[j]->eof) file[j]->eor = false;
466 void extSort(void) {
467 initTmpFiles();
468 makeRuns();
469 mergeSort();
470 termTmpFiles(0);
473 int main(int argc, char *argv[]) {
475 /* command-line:
477 * ext ifName ofName nTmpFiles nNodes
479 * ext in.dat out.dat 5 2000
480 * reads in.dat, sorts using 5 files and 2000 nodes, output to out.dat
482 if (argc != 5) {
483 printf("%s ifName ofName nTmpFiles nNodes\n", argv[0]);
484 cleanExit(1);
487 ifName = argv[1];
488 ofName = argv[2];
489 nTmpFiles = atoi(argv[3]);
490 nNodes = atoi(argv[4]);
492 printf("extSort: nFiles=%d, nNodes=%d, lrecl=%d\n",
493 nTmpFiles, nNodes, sizeof(recType));
495 extSort();
497 return 0;